#***************************************************************#
#                                                               #
#                    Replication file for:                      #
#                                                               #
#  "The long-term effects of war exposure on civic engagement"  #
#                        Barceló, Joan                          #
#    Proceedings of the National Academy of Sciences (PNAS)     #
#                 Last updated: 1 November, 2020                #
#                 Correction: May 13, 2023                      #
#                                                               #
#****************************************************************

#################
#               #
# Load packages #
#               #
#################

library(dplyr)
library(plyr)
library(foreign) 
library(psych) 
library(boot) 
library(stargazer)
library(MASS) 
library(Hmisc) 
library(xtable)
library(lme4) 
library(rms) 
library(car) 
library(lmtest)
library(sandwich)
library(AER)
library(tidyverse)
library(rlang)
library(sf)
library(spdep)
library(lwgeom)
library(fields)
library(data.table)
library(readstata13)
library(sphet)
library(fields)
library(rgdal)
library(raster)
library(texreg)
library(ggplot2)

#################
#               #
#   Load data   #
#               #
#################

#cd <- getwd()
#setwd(cd)
setwd("PATH")
pnas_surveydata <- read.dta13("pnas_surveydata.dta")

pnas_surveydata$diff17_birth_corr <- abs(pnas_surveydata$latitude_birth_corr - 17)
pnas_surveydata$diff17_wartime_corr <- abs(pnas_surveydata$latitude_wartime_corr - 17)
pnas_surveydata$diff17_contemp_corr <- abs(pnas_surveydata$latitude_contemp_corr - 17)

pnas_surveydata$diff_latitude_2001_1975_corr <- pnas_surveydata$latitude_contemp_corr - pnas_surveydata$latitude_wartime_corr
pnas_surveydata$diff17 <- pnas_surveydata$diff17_contemp_corr - pnas_surveydata$diff17_wartime_corr

###########
#         #
# FIGURE 1#
#         #
###########

pnas_surveydata_plot <- pnas_surveydata[, c("province_wartime", "bombs_per_sqm_log_wartime", 
                                            "latitude_wartime_corr", "diff17_wartime_corr")]

#pnas_surveydata_plot <- unique(pnas_surveydata_plot)

#write.dta(pnas_surveydata_plot, "pnas_provincecentroid_corrected_shared.dta")

figure1b <- ggplot(pnas_surveydata_plot, aes(x=diff17_wartime_corr, y=bombs_per_sqm_log_wartime)) +
  geom_point(colour="#67000D", show.legend = F, size = 3) +
  scale_y_continuous(name="Total Bombs, per squared km (log)") +
  scale_x_continuous(name="Distance to the 17th Parallel") + theme_bw()
figure1b <- figure1b + theme(axis.title.x = element_text(size=18, face="bold"),
                       axis.text.x  = element_text(size=14, angle=0)) 
figure1b <- figure1b + theme(axis.title = element_text(size=18, color = "black", face="bold"),
                       axis.text  = element_text(angle=0, size=14))
figure1b

png(filename="Figure1B_corrected.png", 
    units="in", 
    width=8, 
    height=10, 
    pointsize=15, 
    res=1000)
figure1b
dev.off()

###########
#         #
# TABLE 1 #
#         #
###########

summary(table1_column1 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime, data= pnas_surveydata))

N <- nobs(table1_column1)
G <- length(unique(pnas_surveydata$province_wartime))-1
dfa <- (G/(G - 1)) * (N - 1)/table1_column1$df.residual

cov <- dfa * vcovHC(table1_column1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab1col1 <- sqrt(diag(cov))
coeftest(table1_column1, vcov=cov)

summary(table1_column2 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education 
                     + I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100)
                     + south_wartime + I(latitude_wartime_corr/10), data = pnas_surveydata))

N <- nobs(table1_column2)
dfa <- (G/(G - 1)) * (N - 1)/table1_column2$df.residual

cov <- dfa * vcovHC(table1_column2, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab1col2 <- sqrt(diag(cov))
coeftest(table1_column2, vcov=cov)

summary(table1_column3 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime | 
                            . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table1_column3$n
dfa <- (G/(G - 1)) * (N - 1)/table1_column3$df.residual

cov <- dfa * vcovHC(table1_column3, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab1col3 <- sqrt(diag(cov))
coeftest(table1_column3, vcov=cov)

summary(table1_column3_1 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + I(popdensity6061_wartime/1000) | 
                                  . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table1_column3_1$n
dfa <- (G/(G - 1)) * (N - 1)/table1_column3.1$df.residual

cov <- dfa * vcovHC(table1_column3_1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab1col3_1 <- sqrt(diag(cov))
coeftest(table1_column3_1, vcov=cov)

summary(table1_column4 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education + I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100)
                                 + south_wartime + I(latitude_wartime_corr/10) | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                data= pnas_surveydata), diagnostics=TRUE)

N <- table1_column4$n
dfa <- (G/(G - 1)) * (N - 1)/table1_column4$df.residual

cov <- dfa * vcovHC(table1_column4, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab1col4 <- sqrt(diag(cov))
coeftest(table1_column4, vcov=cov)

stargazer(table1_column1, table1_column2, table1_column3, table1_column3_1, table1_column4,
          se = list(cluster_se_tab1col1, cluster_se_tab1col2, cluster_se_tab1col3, cluster_se_tab1col3_1, cluster_se_tab1col4),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05, 0.001), digits=2)

#####
#####
#Model relevance: first-stage regressions

mod_relevance1 <- lm(bombs_per_sqm_log_wartime ~ diff17_wartime_corr, data = pnas_surveydata)

linearHypothesis(mod_relevance1, 
                 "diff17_wartime_corr = 0", 
                 type = "HC0", cluster = ~province_wartime, adjust = T)

mod_relevance1.2 <- lm(bombs_per_sqm_log_wartime ~ popdensity6061_wartime + diff17_wartime_corr, data = pnas_surveydata)

linearHypothesis(mod_relevance1.2, 
                 "diff17_wartime_corr = 0", 
                 type = "HC0", cluster = ~province_wartime, adjust = T)

mod_relevance2 <- lm(bombs_per_sqm_log_wartime ~ female + age + education + 
                       popdensity6061_wartime + pre_avg_wartime + south_wartime + latitude_wartime_corr + diff17_wartime_corr, 
                     data = pnas_surveydata)

linearHypothesis(mod_relevance2, 
                 "diff17_wartime_corr = 0", 
                 type = "HC0", cluster = ~province_wartime, adjust = T)


#First-stage regression

summary(first_stage_nocontrols <- lm(bombs_per_sqm_log_wartime ~ diff17_wartime_corr, 
                                     data= pnas_surveydata), diagnostics=TRUE)

N <- nobs(first_stage_nocontrols)
dfa <- (G/(G - 1)) * (N - 1)/first_stage_nocontrols$df.residual

cov <- dfa * vcovHC(first_stage_nocontrols, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_firststage_nocontrols <- sqrt(diag(cov))
coeftest(first_stage_nocontrols, vcov=cov)

summary(table1_column3_with_population <- ivreg(log_civic_engagement ~ popdensity6061_wartime + bombs_per_sqm_log_wartime | 
                                  . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table1_column3_with_population$n
dfa <- (G/(G - 1)) * (N - 1)/table1_column3_with_population$df.residual

cov <- dfa * vcovHC(table1_column3_with_population, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab1col3_with_population <- sqrt(diag(cov))
coeftest(table1_column3_with_population, vcov=cov)

summary(first_stage_with_population <- lm(bombs_per_sqm_log_wartime ~ popdensity6061_wartime + diff17_wartime_corr, 
                                     data= pnas_surveydata), diagnostics=TRUE)

N <- nobs(first_stage_with_population)
dfa <- (G/(G - 1)) * (N - 1)/first_stage_with_population$df.residual

cov <- dfa * vcovHC(first_stage_with_population, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_firststage_with_population <- sqrt(diag(cov))
coeftest(first_stage_with_population, vcov=cov)

summary(first_stage_with_controls <- lm(bombs_per_sqm_log_wartime ~ female + age + education + popdensity6061_wartime 
                          + pre_avg_wartime + south_wartime + latitude_wartime_corr
                                 + diff17_wartime_corr, 
                                data= pnas_surveydata), diagnostics=TRUE)

N <- nobs(first_stage_with_controls)
dfa <- (G/(G - 1)) * (N - 1)/first_stage_with_controls$df.residual

cov <- dfa * vcovHC(first_stage_with_controls, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_first_stage_with_controls <- sqrt(diag(cov))
coeftest(first_stage_with_controls, vcov=cov)

car::linearHypothesis(first_stage_with_controls, c("diff17_wartime_corr = 0"))

##########
#########

###########
#         #
# TABLE 2 #
#         #
###########
###########
# PANEL A #
###########

summary(table2_column1_panel_a <- lm(voice_index ~ bombs_per_sqm_log_wartime, data = pnas_surveydata))

G <- length(unique(pnas_surveydata$province_wartime))-1
N <- nobs(table2_column1_panel_a)
dfa <- (G/(G - 1)) * (N - 1)/table2_column1_panel_a$df.residual

cov <- dfa * vcovHC(table2_column1_panel_a, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col1_panel_a <- sqrt(diag(cov))
coeftest(table2_column1_panel_a, vcov=cov)

summary(table2_column2_panel_a <- lm(voice_index ~ bombs_per_sqm_log_wartime + female + age + education + popdensity6061_wartime 
                                     + pre_avg_wartime + south_wartime + latitude_wartime_corr, data= pnas_surveydata))

N <- nobs(table2_column2_panel_a)
dfa <- (G/(G - 1)) * (N - 1)/table2_column2_panel_a$df.residual

cov <- dfa * vcovHC(table2_column2_panel_a, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col2_panel_a <- sqrt(diag(cov))
coeftest(table2_column2_panel_a, vcov=cov)

summary(table2_column3_panel_a <- ivreg(voice_index ~ bombs_per_sqm_log_wartime | 
                                 . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data= pnas_surveydata), diagnostics = TRUE)

N <- table2_column3_panel_a$n
dfa <- (G/(G - 1)) * (N - 1)/table2_column3_panel_a$df.residual

cov <- dfa * vcovHC(table2_column3_panel_a, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col3_panel_a <- sqrt(diag(cov))
coeftest(table2_column3_panel_a, vcov=cov)

summary(table2_column3.1_panel_a <- ivreg(voice_index ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                          . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff17_wartime_corr, data= pnas_surveydata), diagnostics = TRUE)

N <- table2_column3.1_panel_a$n
dfa <- (G/(G - 1)) * (N - 1)/table2_column3.1_panel_a$df.residual

cov <- dfa * vcovHC(table2_column3.1_panel_a, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col3.1_panel_a <- sqrt(diag(cov))
coeftest(table2_column3.1_panel_a, vcov=cov)

summary(table2_column4_panel_a <- ivreg(voice_index ~ bombs_per_sqm_log_wartime + female + age + education + popdensity6061_wartime 
                                        + pre_avg_wartime + south_wartime + latitude_wartime_corr | 
                                 . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data= pnas_surveydata), diagnostics = TRUE)

N <- table2_column4_panel_a$n
dfa <- (G/(G - 1)) * (N - 1)/table2_column4_panel_a$df.residual

cov <- dfa * vcovHC(table2_column4_panel_a, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col4_panel_a <- sqrt(diag(cov))
coeftest(table2_column4_panel_a, vcov=cov)


stargazer(table2_column1_panel_a, table2_column2_panel_a, table2_column3_panel_a,
          se        = list(cluster_se_tab2col1_panel_a, cluster_se_tab2col2_panel_a, cluster_se_tab2col3_panel_a),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05, 0.001), digits=2)

stargazer(table2_column3.1_panel_a, table2_column4_panel_a,
          se        = list(cluster_se_tab2col3.1_panel_a, cluster_se_tab2col4_panel_a),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05, 0.001), digits=2)

###########
# PANEL B #
###########

summary(table2_column1_panel_b <- lm(voice_item1 ~ bombs_per_sqm_log_wartime, data = pnas_surveydata))

N <- nobs(table2_column1_panel_b)
dfa <- (G/(G - 1)) * (N - 1)/table2_column1_panel_b$df.residual

cov <- dfa * vcovHC(table2_column1_panel_b, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col1_panel_b <- sqrt(diag(cov))
coeftest(table2_column1_panel_b, vcov=cov)

summary(table2_column2_panel_b <- lm(voice_item1 ~ bombs_per_sqm_log_wartime + female + age + education + popdensity6061_wartime 
                                     + pre_avg_wartime + south_wartime + latitude_wartime_corr, data= pnas_surveydata))

N <- nobs(table2_column2_panel_b)
dfa <- (G/(G - 1)) * (N - 1)/table2_column2_panel_b$df.residual

cov <- dfa * vcovHC(table2_column2_panel_b, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col2_panel_b <- sqrt(diag(cov))
coeftest(table2_column2_panel_b, vcov=cov)

summary(table2_column3_panel_b <- ivreg(voice_item1 ~ bombs_per_sqm_log_wartime | 
                                          . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data= pnas_surveydata), diagnostics = TRUE)

N <- table2_column3_panel_b$n
dfa <- (G/(G - 1)) * (N - 1)/table2_column3_panel_b$df.residual

cov <- dfa * vcovHC(table2_column3_panel_b, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col3_panel_b <- sqrt(diag(cov))
coeftest(table2_column3_panel_b, vcov=cov)

summary(table2_column3.1_panel_b <- ivreg(voice_item1 ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                          . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff17_wartime_corr, data= pnas_surveydata), diagnostics = TRUE)

N <- table2_column3.1_panel_b$n
dfa <- (G/(G - 1)) * (N - 1)/table2_column3.1_panel_b$df.residual

cov <- dfa * vcovHC(table2_column3.1_panel_b, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col3.1_panel_b <- sqrt(diag(cov))
coeftest(table2_column3.1_panel_b, vcov=cov)


summary(table2_column4_panel_b <- ivreg(voice_item1 ~ bombs_per_sqm_log_wartime + female + age + education + popdensity6061_wartime 
                                        + pre_avg_wartime + south_wartime + latitude_wartime_corr | 
                                          . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data= pnas_surveydata), diagnostics = TRUE)

N <- table2_column4_panel_b$n
dfa <- (G/(G - 1)) * (N - 1)/table2_column4_panel_b$df.residual

cov <- dfa * vcovHC(table2_column4_panel_b, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col4_panel_b <- sqrt(diag(cov))
coeftest(table2_column4_panel_b, vcov=cov)

stargazer(table2_column1_panel_b, table2_column2_panel_b, table2_column3_panel_b,
          se        = list(cluster_se_tab2col1_panel_b, cluster_se_tab2col2_panel_b, cluster_se_tab2col3_panel_b),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table2_column3.1_panel_b, table2_column4_panel_b,
          se        = list(cluster_se_tab2col3.1_panel_b, cluster_se_tab2col4_panel_b),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
# PANEL C #
###########

summary(table2_column1_panel_c <- lm(voice_item2 ~ bombs_per_sqm_log_wartime, data = pnas_surveydata))

N <- nobs(table2_column1_panel_c)
dfa <- (G/(G - 1)) * (N - 1)/table2_column1_panel_c$df.residual

cov <- dfa * vcovHC(table2_column1_panel_c, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col1_panel_c <- sqrt(diag(cov))
coeftest(table2_column1_panel_c, vcov=cov)

summary(table2_column2_panel_c <- lm(voice_item2 ~ bombs_per_sqm_log_wartime + female + age + education + popdensity6061_wartime 
                                     + pre_avg_wartime + south_wartime + latitude_wartime_corr, data= pnas_surveydata))

N <- nobs(table2_column2_panel_c)
dfa <- (G/(G - 1)) * (N - 1)/table2_column2_panel_c$df.residual

cov <- dfa * vcovHC(table2_column2_panel_c, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col2_panel_c <- sqrt(diag(cov))
coeftest(table2_column2_panel_c, vcov=cov)

summary(table2_column3_panel_c <- ivreg(voice_item2 ~ bombs_per_sqm_log_wartime | 
                                          . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data= pnas_surveydata), diagnostics = TRUE)

N <- table2_column3_panel_c$n
dfa <- (G/(G - 1)) * (N - 1)/table2_column3_panel_c$df.residual

cov <- dfa * vcovHC(table2_column3_panel_c, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col3_panel_c <- sqrt(diag(cov))
coeftest(table2_column3_panel_c, vcov=cov)

summary(table2_column3.1_panel_c <- ivreg(voice_item2 ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                          . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff17_wartime_corr, data= pnas_surveydata), diagnostics = TRUE)

N <- table2_column3.1_panel_c$n
dfa <- (G/(G - 1)) * (N - 1)/table2_column3.1_panel_c$df.residual

cov <- dfa * vcovHC(table2_column3.1_panel_c, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col3.1_panel_c <- sqrt(diag(cov))
coeftest(table2_column3.1_panel_c, vcov=cov)

summary(table2_column4_panel_c <- ivreg(voice_item2 ~ bombs_per_sqm_log_wartime + female + age + education + popdensity6061_wartime 
                                        + pre_avg_wartime + south_wartime + latitude_wartime_corr | 
                                          . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data= pnas_surveydata), diagnostics = TRUE)

N <- table2_column4_panel_c$n
dfa <- (G/(G - 1)) * (N - 1)/table2_column4_panel_c$df.residual

cov <- dfa * vcovHC(table2_column4_panel_c, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab2col4_panel_c <- sqrt(diag(cov))
coeftest(table2_column4_panel_c, vcov=cov)

stargazer(table2_column1_panel_c, table2_column2_panel_c, table2_column3_panel_c,
          se        = list(cluster_se_tab2col1_panel_c, cluster_se_tab2col2_panel_c, cluster_se_tab2col3_panel_c),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table2_column3.1_panel_c, table2_column4_panel_c,
          se        = list(cluster_se_tab2col3.1_panel_c, cluster_se_tab2col4_panel_c),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)


###########
#         #
# TABLE 3 #
#         #
###########
###########
#  ROW 1  #
###########

summary(table3_column1_row1 <- lm(log_civic_engagement_m ~ bombs_per_sqm_log_wartime, data = pnas_surveydata))

G <- length(unique(pnas_surveydata$province_wartime))-1
N <- nobs(table3_column1_row1)
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row1$df.residual

cov <- dfa * vcovHC(table3_column1_row1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row1 <- sqrt(diag(cov))
coeftest(table3_column1_row1, vcov=cov)


summary(table3_column2_row1 <- lm(log_civic_engagement_m ~ bombs_per_sqm_log_wartime + female + age + education +
                                    I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10), data = pnas_surveydata))

N <- nobs(table3_column2_row1)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row1$df.residual

cov <- dfa * vcovHC(table3_column2_row1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row1 <- sqrt(diag(cov))
coeftest(table3_column2_row1, vcov=cov)

summary(table3_column3_row1 <- ivreg(log_civic_engagement_m ~ bombs_per_sqm_log_wartime | 
                                       . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3_row1$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row1$df.residual

cov <- dfa * vcovHC(table3_column3_row1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row1 <- sqrt(diag(cov))
coeftest(table3_column3_row1, vcov=cov)

summary(table3_column3.1_row1 <- ivreg(log_civic_engagement_m ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                       . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3.1_row1$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row1$df.residual

cov <- dfa * vcovHC(table3_column3.1_row1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row1 <- sqrt(diag(cov))
coeftest(table3_column3.1_row1, vcov=cov)

summary(table3_column4_row1 <- ivreg(log_civic_engagement_m ~ bombs_per_sqm_log_wartime + female + age + education + 
                                       I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10)
                                     | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                     data= pnas_surveydata), diagnostics=TRUE)

N <- table3_column4_row1$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row1$df.residual

cov <- dfa * vcovHC(table3_column4_row1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row1 <- sqrt(diag(cov))
coeftest(table3_column4_row1, vcov=cov)

stargazer(table3_column1_row1, table3_column2_row1, table3_column3_row1,
          se = list(cluster_se_tab3col1row1, cluster_se_tab3col2row1, cluster_se_tab3col3row1),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table3_column3.1_row1, table3_column4_row1,
          se = list(cluster_se_tab3col3.1row1, cluster_se_tab3col4row1),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 2  #
###########

summary(table3_column1_row2 <- lm(log_civic_engagement ~ bombs_per_sqm_log_birth, data = pnas_surveydata))

G <- length(unique(pnas_surveydata$province_birth))-1
N <- nobs(table3_column1_row2)
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row2$df.residual

cov <- dfa * vcovHC(table3_column1_row2, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row2 <- sqrt(diag(cov))
coeftest(table3_column1_row2, vcov=cov)


summary(table3_column2_row2 <- lm(log_civic_engagement ~ bombs_per_sqm_log_birth + female + age + education +
                                    I(popdensity6061_birth/1000) + I(pre_avg_birth/100) + south_birth + I(latitude_birth_corr/10), data = pnas_surveydata))

N <- nobs(table3_column2_row2)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row2$df.residual

cov <- dfa * vcovHC(table3_column2_row2, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row2 <- sqrt(diag(cov))
coeftest(table3_column2_row2, vcov=cov)

summary(table3_column3_row2 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_birth | 
                                       . - bombs_per_sqm_log_birth + diff17_birth_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3_row2$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row2$df.residual

cov <- dfa * vcovHC(table3_column3_row2, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row2 <- sqrt(diag(cov))
coeftest(table3_column3_row2, vcov=cov)

summary(table3_column3.1_row2 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_birth + popdensity6061_birth | 
                                       . - bombs_per_sqm_log_birth + popdensity6061_birth + diff17_birth_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3.1_row2$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row2$df.residual

cov <- dfa * vcovHC(table3_column3.1_row2, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row2 <- sqrt(diag(cov))
coeftest(table3_column3.1_row2, vcov=cov)

summary(table3_column4_row2 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_birth + female + age + education + 
                                       I(popdensity6061_birth/1000) + I(pre_avg_birth/100) + south_birth + I(latitude_birth_corr/10) | . - bombs_per_sqm_log_birth + diff17_birth_corr, 
                                     data= pnas_surveydata), diagnostics=TRUE)

N <- table3_column4_row2$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row2$df.residual

cov <- dfa * vcovHC(table3_column4_row2, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row2 <- sqrt(diag(cov))
coeftest(table3_column4_row2, vcov=cov)

stargazer(table3_column1_row2, table3_column2_row2, table3_column3_row2, table3_column4_row2,
          se = list(cluster_se_tab3col1row2, cluster_se_tab3col2row2, cluster_se_tab3col3row2, cluster_se_tab3col4row2),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 3  #
###########

voluntary_org <- data.matrix(cbind(pnas_surveydata$org_soc, pnas_surveydata$chu_soc, pnas_surveydata$cul_soc, pnas_surveydata$lab_soc,
                                   pnas_surveydata$pp_soc, pnas_surveydata$loc_soc, pnas_surveydata$hum_soc, pnas_surveydata$env_soc,
                                   pnas_surveydata$prof_soc, pnas_surveydata$you_soc, pnas_surveydata$spo_soc, pnas_surveydata$wom_soc,
                                   pnas_surveydata$pea_soc, pnas_surveydata$hea_soc))

results_OLS1_excluding_items <- matrix(nrow=14,ncol=4)
results_OLS2_excluding_items <- matrix(nrow=14,ncol=4)
results_IV1_excluding_items <- matrix(nrow=14,ncol=4)
results_IV1.1_excluding_items <- matrix(nrow=14,ncol=4)
results_IV2_excluding_items <- matrix(nrow=14,ncol=4)

for (i in 1:14){
  fanalysis <- fa.poly(voluntary_org[,c(-i)], nfactors= 1, rotate="oblimin", scores="tenBerge")
  
  pnas_surveydata$log_voluntaryorg_excl <- log(fanalysis$scores$scores + 1)
  
  OLS1 <- lm(log_voluntaryorg_excl ~ bombs_per_sqm_log_birth, data= pnas_surveydata)
  
  G <- length(unique(pnas_surveydata$province_wartime))-1
  
  results_OLS1_excluding_items[i,] <- coeftest(OLS1, vcov=(G/(G - 1)) * (nobs(OLS1) - 1)/OLS1$df.residual * vcovHC(OLS1, type = "HC0", cluster = ~province_wartime, adjust = T))[2,]
  
  OLS2 <- lm(log_voluntaryorg_excl ~ bombs_per_sqm_log_wartime + female + age + education + popdensity6061_wartime + pre_avg_wartime + 
               south_wartime + latitude_wartime_corr, data= pnas_surveydata)
  results_OLS2_excluding_items[i,] <- coeftest(OLS2, vcov=(G/(G - 1)) * (nobs(OLS2) - 1)/OLS2$df.residual * vcovHC(OLS2, type = "HC0", cluster = ~province_wartime, adjust = T))[2,]
  
  IV1 <- ivreg(log_voluntaryorg_excl ~ bombs_per_sqm_log_wartime | 
                 . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data= pnas_surveydata)
  
  G <- length(unique(pnas_surveydata$bombs_per_sqm_log_birth))-1
  results_IV1_excluding_items[i,] <- coeftest(IV1, vcov=(G/(G - 1)) * (IV1$n - 1)/IV1$df.residual * vcovHC(IV1, type = "HC0", cluster = ~province_wartime, adjust = T))[2,]
  
  IV1.1 <- ivreg(log_voluntaryorg_excl ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                 . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff17_wartime_corr, data= pnas_surveydata)
  
  G <- length(unique(pnas_surveydata$bombs_per_sqm_log_wartime))-1
  results_IV1.1_excluding_items[i,] <- coeftest(IV1.1, vcov=(G/(G - 1)) * (IV1.1$n - 1)/IV1.1$df.residual * vcovHC(IV1.1, type = "HC0", cluster = ~province_wartime, adjust = T))[2,]
  
  IV2 <- ivreg(log_voluntaryorg_excl ~ bombs_per_sqm_log_wartime + female + age + education + popdensity6061_wartime + pre_avg_wartime + 
                 south_wartime + latitude_wartime_corr | 
                 . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata)
  
  results_IV2_excluding_items[i,] <- coeftest(IV2, vcov=(G/(G - 1)) * (IV2$n - 1)/IV2$df.residual * vcovHC(IV2, type = "HC0", cluster = ~province_wartime, adjust = T))[2,]
  print(i)
}

round(results_OLS1_excluding_items[which(results_OLS1_excluding_items[,1] == min(results_OLS1_excluding_items[,1])),],2) #Row 3, column 1
round(results_OLS2_excluding_items[which(results_OLS2_excluding_items[,1] == min(results_OLS2_excluding_items[,1])),],2) #Row 3, column 2
round(results_IV1_excluding_items[which(results_IV1_excluding_items[,1] == min(results_IV1_excluding_items[,1])),],2) #Row 3, column 3
round(results_IV1.1_excluding_items[which(results_IV1.1_excluding_items[,1] == min(results_IV1.1_excluding_items[,1])),],2) #Row 3, column 4
round(results_IV2_excluding_items[which(results_IV2_excluding_items[,1] == min(results_IV2_excluding_items[,1])),],2) #Row 3, column 5

###########
#  ROW 4  #
###########

pnas_surveydata_exclpol <- pnas_surveydata[pnas_surveydata$pp_socm == 0 & pnas_surveydata$lab_socm == 0,]

summary(table3_column1_row4 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime, data = pnas_surveydata_exclpol))

G <- length(unique(pnas_surveydata_exclpol$province_wartime))-1
N <- nobs(table3_column1_row4)
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row4$df.residual

cov <- dfa * vcovHC(table3_column1_row4, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row4 <- sqrt(diag(cov))
coeftest(table3_column1_row4, vcov=cov)


summary(table3_column2_row4 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education +
                                    I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10),
                                  data = pnas_surveydata_exclpol))

N <- nobs(table3_column2_row4)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row4$df.residual

cov <- dfa * vcovHC(table3_column2_row4, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row4 <- sqrt(diag(cov))
coeftest(table3_column2_row4, vcov=cov)

summary(table3_column3_row4 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime | 
                                       . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata_exclpol), diagnostics=TRUE)

N <- table3_column3_row4$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row4$df.residual

cov <- dfa * vcovHC(table3_column3_row4, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row4 <- sqrt(diag(cov))
coeftest(table3_column3_row4, vcov=cov)

summary(table3_column3.1_row4 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                       . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff17_wartime_corr, data = pnas_surveydata_exclpol), diagnostics=TRUE)

N <- table3_column3.1_row4$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row4$df.residual

cov <- dfa * vcovHC(table3_column3.1_row4, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row4 <- sqrt(diag(cov))
coeftest(table3_column3.1_row4, vcov=cov)

summary(table3_column4_row4 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education + 
                                       I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10) 
                                     | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                     data= pnas_surveydata_exclpol), diagnostics=TRUE)

N <- table3_column4_row4$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row4$df.residual

cov <- dfa * vcovHC(table3_column4_row4, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row4 <- sqrt(diag(cov))
coeftest(table3_column4_row4, vcov=cov)

stargazer(table3_column1_row4, table3_column2_row4, table3_column3_row4,
          se = list(cluster_se_tab3col1row4, cluster_se_tab3col2row4, cluster_se_tab3col3row4),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table3_column3.1_row4, table3_column4_row4,
          se = list(cluster_se_tab3col3.1row4, cluster_se_tab3col4row4),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 5  #
###########

summary(table3_column1_row5 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + pp_socm + lab_socm, data = pnas_surveydata))

N <- nobs(table3_column1_row5)
G <- length(unique(pnas_surveydata$province_wartime))-1
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row5$df.residual

cov <- dfa * vcovHC(table3_column1_row5, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row5 <- sqrt(diag(cov))
coeftest(table3_column1_row5, vcov=cov)

summary(table3_column2_row5 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education 
                                  +  I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10)
                                  + pp_socm + lab_socm, data = pnas_surveydata))

N <- nobs(table3_column2_row5)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row5$df.residual

cov <- dfa * vcovHC(table3_column2_row5, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row5 <- sqrt(diag(cov))
coeftest(table3_column2_row5, vcov=cov)

summary(table3_column3_row5 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + pp_socm + lab_socm | 
                                       . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3_row5$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row5$df.residual

cov <- dfa * vcovHC(table3_column3_row5, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row5 <- sqrt(diag(cov))
coeftest(table3_column3_row5, vcov=cov)

summary(table3_column3.1_row5 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + pp_socm + lab_socm + popdensity6061_wartime | 
                                       . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3.1_row5$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row5$df.residual

cov <- dfa * vcovHC(table3_column3.1_row5, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row5 <- sqrt(diag(cov))
coeftest(table3_column3.1_row5, vcov=cov)

summary(table3_column4_row5 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education + 
                                       I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10) + 
                                       pp_socm + lab_socm | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                     data= pnas_surveydata), diagnostics=TRUE)

N <- table3_column4_row5$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row5$df.residual

cov <- dfa * vcovHC(table3_column4_row5, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row5 <- sqrt(diag(cov))
coeftest(table3_column4_row5, vcov=cov)

stargazer(table3_column1_row5, table3_column2_row5, table3_column3_row5,
          se = list(cluster_se_tab3col1row5, cluster_se_tab3col2row5, cluster_se_tab3col3row5),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table3_column3.1_row5, table3_column4_row5,
          se = list(cluster_se_tab3col3.1row5, cluster_se_tab3col4row5),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 6  #
###########

summary(table3_column1_row6 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime, 
                                  data = pnas_surveydata[-which(pnas_surveydata$province_wartime == "Quang Tri"),]))

N <- nobs(table3_column1_row6)
G <- length(unique(pnas_surveydata$province_wartime))-1
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row6$df.residual

cov <- dfa * vcovHC(table3_column1_row6, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row6 <- sqrt(diag(cov))
coeftest(table3_column1_row6, vcov=cov)

summary(table3_column2_row6 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education +
                                    I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10),
                                  data = pnas_surveydata[-which(pnas_surveydata$province_wartime == "Quang Tri"),]))

N <- nobs(table3_column2_row6)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row6$df.residual

cov <- dfa * vcovHC(table3_column2_row6, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row6 <- sqrt(diag(cov))
coeftest(table3_column2_row6, vcov=cov)

summary(table3_column3_row6 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime | 
                                       . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                     data = pnas_surveydata[-which(pnas_surveydata$province_wartime == "Quang Tri"),]),
        diagnostics=TRUE)

N <- table3_column3_row6$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row6$df.residual

cov <- dfa * vcovHC(table3_column3_row6, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row6 <- sqrt(diag(cov))
coeftest(table3_column3_row6, vcov=cov)

summary(table3_column3.1_row6 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                       . - bombs_per_sqm_log_wartime + diff17_wartime_corr + popdensity6061_wartime, 
                                     data = pnas_surveydata[-which(pnas_surveydata$province_wartime == "Quang Tri"),]),
        diagnostics=TRUE)

N <- table3_column3.1_row6$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row6$df.residual

cov <- dfa * vcovHC(table3_column3.1_row6, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row6 <- sqrt(diag(cov))
coeftest(table3_column3.1_row6, vcov=cov)

summary(table3_column4_row6 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education +
                                       I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10) | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                     data = pnas_surveydata[-which(pnas_surveydata$province_wartime == "Quang Tri"),]), 
        diagnostics=TRUE)

N <- table3_column4_row6$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row6$df.residual

cov <- dfa * vcovHC(table3_column4_row6, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row6 <- sqrt(diag(cov))
coeftest(table3_column4_row6, vcov=cov)

stargazer(table3_column1_row6, table3_column2_row6, table3_column3_row6,
          se = list(cluster_se_tab3col1row6, cluster_se_tab3col2row6, cluster_se_tab3col3row6),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table3_column3.1_row6, table3_column4_row6,
          se = list(cluster_se_tab3col3.1row6, cluster_se_tab3col4row6),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 7  #
###########

summary(table3_column1_row7 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime, 
                                  data = subset(pnas_surveydata, bombs_per_sqm_log_wartime < 
                                                  round(quantile(pnas_surveydata$bombs_per_sqm_log_wartime, 0.80, na.rm=TRUE),1))))

N <- nobs(table3_column1_row7)
G <- length(unique(pnas_surveydata$province_wartime))-1
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row7$df.residual

cov <- dfa * vcovHC(table3_column1_row7, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row7 <- sqrt(diag(cov))
coeftest(table3_column1_row7, vcov=cov)

summary(table3_column2_row7 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education +
                                    I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10),
                                  data = subset(pnas_surveydata, bombs_per_sqm_log_wartime < 
                                                  round(quantile(pnas_surveydata$bombs_per_sqm_log_wartime, 0.80, na.rm=TRUE),1))
))

N <- nobs(table3_column2_row7)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row7$df.residual

cov <- dfa * vcovHC(table3_column2_row7, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row7 <- sqrt(diag(cov))
coeftest(table3_column2_row7, vcov=cov)

summary(table3_column3_row7 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime | 
                                       . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                     data = subset(pnas_surveydata, bombs_per_sqm_log_wartime < 
                                                     round(quantile(pnas_surveydata$bombs_per_sqm_log_wartime, 0.80, na.rm=TRUE),1))),
        diagnostics=TRUE)

N <- table3_column3_row7$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row7$df.residual

cov <- dfa * vcovHC(table3_column3_row7, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row7 <- sqrt(diag(cov))
coeftest(table3_column3_row7, vcov=cov)

summary(table3_column3.1_row7 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                       . - bombs_per_sqm_log_wartime + diff17_wartime_corr + popdensity6061_wartime, 
                                     data = subset(pnas_surveydata, bombs_per_sqm_log_wartime < 
                                                     round(quantile(pnas_surveydata$bombs_per_sqm_log_wartime, 0.80, na.rm=TRUE),1))),
        diagnostics=TRUE)

N <- table3_column3.1_row7$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row7$df.residual

cov <- dfa * vcovHC(table3_column3.1_row7, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row7 <- sqrt(diag(cov))
coeftest(table3_column3.1_row7, vcov=cov)

summary(table3_column4_row7 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education + 
                                       I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10) | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                     data = subset(pnas_surveydata, bombs_per_sqm_log_wartime < 
                                                     round(quantile(pnas_surveydata$bombs_per_sqm_log_wartime, 0.80, na.rm=TRUE),1))), 
        diagnostics=TRUE)

N <- table3_column4_row7$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row7$df.residual

cov <- dfa * vcovHC(table3_column4_row7, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row7 <- sqrt(diag(cov))
coeftest(table3_column4_row7, vcov=cov)

stargazer(table3_column1_row7, table3_column2_row7, table3_column3_row7,
          se = list(cluster_se_tab3col1row7, cluster_se_tab3col2row7, cluster_se_tab3col3row7),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table3_column3.1_row7, table3_column4_row7,
          se = list(cluster_se_tab3col3.1row7, cluster_se_tab3col4row7),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 8  #
###########

n_respondents <- count(pnas_surveydata,c('province_wartime'))
pnas_surveydata <- merge(pnas_surveydata, n_respondents, by = 'province_wartime')

summary(table3_column1_row8 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime, 
                                  data = subset(pnas_surveydata, n_respondents > 5)))

N <- nobs(table3_column1_row8)
G <- length(unique(pnas_surveydata$province_wartime))-1
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row8$df.residual

cov <- dfa * vcovHC(table3_column1_row8, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row8 <- sqrt(diag(cov))
coeftest(table3_column1_row8, vcov=cov)

summary(table3_column2_row8 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education +
                                    I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10),
                                  data = subset(pnas_surveydata, n_respondents > 5)))

N <- nobs(table3_column2_row8)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row8$df.residual

cov <- dfa * vcovHC(table3_column2_row8, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row8 <- sqrt(diag(cov))
coeftest(table3_column2_row8, vcov=cov)

summary(table3_column3_row8 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime | 
                                       . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                     data = subset(pnas_surveydata, n_respondents > 5)),
        diagnostics=TRUE)

N <- table3_column3_row8$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row8$df.residual

cov <- dfa * vcovHC(table3_column3_row8, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row8 <- sqrt(diag(cov))
coeftest(table3_column3_row8, vcov=cov)

summary(table3_column3.1_row8 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                       . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff17_wartime_corr, 
                                     data = subset(pnas_surveydata, n_respondents > 5)),
        diagnostics=TRUE)

N <- table3_column3.1_row8$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row8$df.residual

cov <- dfa * vcovHC(table3_column3.1_row8, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row8 <- sqrt(diag(cov))
coeftest(table3_column3.1_row8, vcov=cov)

summary(table3_column4_row8 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education +
                                       I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10)| . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                     data = subset(pnas_surveydata, n_respondents > 5)), 
        diagnostics=TRUE)

N <- table3_column4_row8$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row8$df.residual

cov <- dfa * vcovHC(table3_column4_row8, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row8 <- sqrt(diag(cov))
coeftest(table3_column4_row8, vcov=cov)

stargazer(table3_column1_row8, table3_column2_row8, table3_column3_row8,
          se = list(cluster_se_tab3col1row8, cluster_se_tab3col2row8, cluster_se_tab3col3row8),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)


stargazer(table3_column3.1_row8, table3_column4_row8,
          se = list(cluster_se_tab3col3.1row8, cluster_se_tab3col4row8),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 9  #
###########

summary(table3_column1_row9 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime, 
                                  data = subset(pnas_surveydata, south_wartime == 0)))

N <- nobs(table3_column1_row9)
G <- length(unique(pnas_surveydata$province_wartime))-1
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row9$df.residual

cov <- dfa * vcovHC(table3_column1_row9, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row9 <- sqrt(diag(cov))
coeftest(table3_column1_row9, vcov=cov)

summary(table3_column2_row9 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education 
                                  + I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100),
                                  data = subset(pnas_surveydata, south_wartime == 0)))

N <- nobs(table3_column2_row9)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row9$df.residual

cov <- dfa * vcovHC(table3_column2_row9, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row9 <- sqrt(diag(cov))
coeftest(table3_column2_row9, vcov=cov)

summary(table3_column3.1_row9 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                       . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff_17_wartime, 
                                     data = subset(pnas_surveydata, south_wartime == 0)),
        diagnostics=TRUE)

N <- table3_column3.1_row9$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row9$df.residual

cov <- dfa * vcovHC(table3_column3.1_row9, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row9 <- sqrt(diag(cov))
coeftest(table3_column3.1_row9, vcov=cov)

summary(table3_column3_row9 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime | 
                                       . - bombs_per_sqm_log_wartime + diff_17_wartime, 
                                     data = subset(pnas_surveydata, south_wartime == 0)),
        diagnostics=TRUE)

N <- table3_column3_row9$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row9$df.residual

cov <- dfa * vcovHC(table3_column3_row9, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row9 <- sqrt(diag(cov))
coeftest(table3_column3_row9, vcov=cov)

summary(table3_column4_row9 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education + 
                                       I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) | . - bombs_per_sqm_log_wartime + diff_17_wartime, 
                                     data = subset(pnas_surveydata, south_wartime == 0)), 
        diagnostics=TRUE)

N <- table3_column4_row9$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row9$df.residual

cov <- dfa * vcovHC(table3_column4_row9, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row9 <- sqrt(diag(cov))
coeftest(table3_column4_row9, vcov=cov)

stargazer(table3_column1_row9, table3_column2_row9, table3_column3_row9,
          se = list(cluster_se_tab3col1row9, cluster_se_tab3col2row9, cluster_se_tab3col3row9),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table3_column3.1_row9, table3_column4_row9,
          se = list(cluster_se_tab3col3.1row9, cluster_se_tab3col4row9),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 10  #
###########

summary(table3_column1_row10 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + as.factor(colonial_province_wartime), 
                                   data = pnas_surveydata))

N <- nobs(table3_column1_row10)
G <- length(unique(pnas_surveydata$province_wartime))-1
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row10$df.residual

cov <- dfa * vcovHC(table3_column1_row10, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row10 <- sqrt(diag(cov))
coeftest(table3_column1_row10, vcov=cov)

summary(table3_column2_row10 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education +
                                     I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10)
                                   + as.factor(colonial_province_wartime), data = pnas_surveydata))

N <- nobs(table3_column2_row10)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row10$df.residual

cov <- dfa * vcovHC(table3_column2_row10, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row10 <- sqrt(diag(cov))
coeftest(table3_column2_row10, vcov=cov)

summary(table3_column3_row10 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + as.factor(colonial_province_wartime)  | 
                                        . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3_row10$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row10$df.residual

cov <- dfa * vcovHC(table3_column3_row10, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row10 <- sqrt(diag(cov))
coeftest(table3_column3_row10, vcov=cov)

summary(table3_column3.1_row10 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + as.factor(colonial_province_wartime) + popdensity6061_wartime | 
                                        . - bombs_per_sqm_log_wartime + popdensity6061_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3.1_row10$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row10$df.residual

cov <- dfa * vcovHC(table3_column3.1_row10, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row10 <- sqrt(diag(cov))
coeftest(table3_column3.1_row10, vcov=cov)

summary(table3_column4_row10 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education + 
                                        I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime_corr/10) + as.factor(colonial_province_wartime) | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                      data= pnas_surveydata), diagnostics=TRUE)

N <- table3_column4_row10$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row10$df.residual

cov <- dfa * vcovHC(table3_column4_row10, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row10 <- sqrt(diag(cov))
coeftest(table3_column4_row10, vcov=cov)

stargazer(table3_column1_row10, table3_column2_row10, table3_column3_row10,
          se = list(cluster_se_tab3col1row10, cluster_se_tab3col2row10, cluster_se_tab3col3row10),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table3_column3.1_row10, table3_column4_row10,
          se = list(cluster_se_tab3col3.1row10, cluster_se_tab3col4row10),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 11  #
###########

pnas_surveydata$high_alt_bombs_per_sqm_log_wartime <- log((pnas_surveydata$tot_bmr - pnas_surveydata$Incendiary - pnas_surveydata$Rocket)/pnas_surveydata$area_sum + 1) 

pnas_surveydata$high_alt_bombs_per_sqm_log_wartime <- ifelse(is.na(pnas_surveydata$bombs_per_sqm_log_wartime), NA, pnas_surveydata$high_alt_bombs_per_sqm_log_wartime)

summary(table3_column1_row11 <- lm(log_civic_engagement ~ high_alt_bombs_per_sqm_log_wartime, data = pnas_surveydata))

N <- nobs(table3_column1_row11)
G <- length(unique(pnas_surveydata$province_wartime))-1
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row11$df.residual

cov <- dfa * vcovHC(table3_column1_row11, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row11 <- sqrt(diag(cov))
coeftest(table3_column1_row11, vcov=cov)

summary(table3_column2_row11 <- lm(log_civic_engagement ~ high_alt_bombs_per_sqm_log_wartime + female + age + education 
                                   + I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime/10), 
                                   data = pnas_surveydata))

N <- nobs(table3_column2_row11)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row11$df.residual

cov <- dfa * vcovHC(table3_column2_row11, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row11 <- sqrt(diag(cov))
coeftest(table3_column2_row11, vcov=cov)

summary(table3_column3_row11 <- ivreg(log_civic_engagement ~ high_alt_bombs_per_sqm_log_wartime | 
                                        . - high_alt_bombs_per_sqm_log_wartime + diff_17_wartime, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3_row11$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row11$df.residual

cov <- dfa * vcovHC(table3_column3_row11, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row11 <- sqrt(diag(cov))
coeftest(table3_column3_row11, vcov=cov)

summary(table3_column3.1_row11 <- ivreg(log_civic_engagement ~ high_alt_bombs_per_sqm_log_wartime | 
                                        . - high_alt_bombs_per_sqm_log_wartime + diff_17_wartime, data = pnas_surveydata), diagnostics=TRUE)

N <- table3_column3.1_row11$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row11$df.residual

cov <- dfa * vcovHC(table3_column3.1_row11, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row11 <- sqrt(diag(cov))
coeftest(table3_column3.1_row11, vcov=cov)

summary(table3_column4_row11 <- ivreg(log_civic_engagement ~ high_alt_bombs_per_sqm_log_wartime + female + age + education + 
                                        + I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100) + south_wartime + I(latitude_wartime/10) | . - high_alt_bombs_per_sqm_log_wartime + diff_17_wartime, 
                                      data= pnas_surveydata), diagnostics=TRUE)

N <- table3_column4_row11$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row11$df.residual

cov <- dfa * vcovHC(table3_column4_row11, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row11 <- sqrt(diag(cov))
coeftest(table3_column4_row11, vcov=cov)

stargazer(table3_column1_row11, table3_column2_row11, table3_column3_row11,
          se = list(cluster_se_tab3col1row11, cluster_se_tab3col2row11, cluster_se_tab3col3row11),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table3_column3.1_row11, table3_column4_row11,
          se = list(cluster_se_tab3col3.1row11, cluster_se_tab3col4row11),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 12 #
###########

set.seed(19)
data <- table1_column4$model
N_sim <- nrow(data)*1.11
sim_size <- round(N_sim - nrow(data))
prob_of_selection <- exp(data$bombs_per_sqm_log_wartime)

index <- sample(1:nrow(data), sim_size, replace = TRUE, prob = prob_of_selection)

sim_data <- data[index, ]
sim_data$log_civic_engagement <- quantile(data$log_civic_engagement, 0.05)

colnames(data)[c(6,7,9)] <- c("popdensity6061_wartime", "pre_avg_wartime", "latitude_wartime_corr")
colnames(sim_data)[c(6,7,9)] <- c("popdensity6061_wartime", "pre_avg_wartime", "latitude_wartime_corr")

final_data <- rbind(data, sim_data)

summary(table3_column1_row12 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime, data = final_data))

G <- length(unique(final_data$province_wartime))-1
N <- nobs(table3_column1_row12)
dfa <- (G/(G - 1)) * (N - 1)/table3_column1_row12$df.residual

cov <- dfa * vcovHC(table3_column1_row12, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col1row12 <- sqrt(diag(cov))
coeftest(table3_column1_row12, vcov=cov)


summary(table3_column2_row12 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education 
                                   + popdensity6061_wartime + pre_avg_wartime + south_wartime + latitude_wartime_corr, data = final_data))

N <- nobs(table3_column2_row12)
dfa <- (G/(G - 1)) * (N - 1)/table3_column2_row12$df.residual

cov <- dfa * vcovHC(table3_column2_row12, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col2row12 <- sqrt(diag(cov))
coeftest(table3_column2_row12, vcov=cov)

summary(table3_column3_row12 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime | 
                                        . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = final_data), diagnostics=TRUE)

N <- table3_column3_row12$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3_row12$df.residual

cov <- dfa * vcovHC(table3_column3_row12, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3row12 <- sqrt(diag(cov))
coeftest(table3_column3_row12, vcov=cov)

summary(table3_column3.1_row12 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + popdensity6061_wartime | 
                                        . - bombs_per_sqm_log_wartime + diff17_wartime_corr + popdensity6061_wartime, data = final_data), diagnostics=TRUE)

N <- table3_column3.1_row12$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column3.1_row12$df.residual

cov <- dfa * vcovHC(table3_column3.1_row12, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col3.1row12 <- sqrt(diag(cov))
coeftest(table3_column3.1_row12, vcov=cov)

summary(table3_column4_row12 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education
                                      + popdensity6061_wartime + pre_avg_wartime + south_wartime + latitude_wartime_corr | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, 
                                      data= final_data), diagnostics=TRUE)

N <- table3_column4_row12$n
dfa <- (G/(G - 1)) * (N - 1)/table3_column4_row12$df.residual

cov <- dfa * vcovHC(table3_column4_row12, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab3col4row12 <- sqrt(diag(cov))
coeftest(table3_column4_row12, vcov=cov)

stargazer(table3_column1_row12, table3_column2_row12, table3_column3_row12,
          se = list(cluster_se_tab3col1row12, cluster_se_tab3col2row12, cluster_se_tab3col3row12),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table3_column3.1_row12, table3_column4_row12,
          se = list(cluster_se_tab3col3.1row12, cluster_se_tab3col4row12),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

###########
#  ROW 13  #
###########
library(rgdal)
library(raster)

link<-"https://data.opendevelopmentmekong.net/dataset/999c96d8-fae0-4b82-9a2b-e481f6f50e12/resource/2818c2c5-e9c3-440b-a9b8-3029d7298065/download/diaphantinhenglish.geojson?fbclid=IwAR1coUVLkuEoJRsgaH81q6ocz1nVeGBirqpKRBN8WWxXQIJREUL1buFi1eE"

vn_spatial<-sf::st_read(link)

vn_spatial$Name

fromto <- read.table(text="
from to
š s
œ oe
ž z
ß ss
þ y
à a
á a
â a
ã a
ä a
å a
ẵ a
ắ a
ả a
ả a
ầ a
ạ a
ậ a
ă a
ằ a
æ ae
ç c
Đ D
ề e
è e
é e
ê e
ë e
ệ e
ế e
ì i
í i
î i
ï i
ĩ i
ị i
ị i
ð d
ñ n
ò o
ó o
ồ o
ơ o
ớ o
ộ o
ô o
õ o
ö o
ọ o
ồ o
ø oe
ù u
ú u
û u
ü u
ừ u
ư u
ư u
ũ u
ý y
ÿ y
ğ g",header=TRUE)

replaceforeignchars <- function(dat,fromto) {
  for(i in 1:nrow(fromto) ) {
    dat <- gsub(fromto$from[i],fromto$to[i],dat)
  }
  dat
}

viet_coord<-as.data.frame(cbind(vn_spatial$Name, 
                                st_coordinates(st_centroid(st_geometry(vn_spatial),
                                                           of_largest_polygon=T))))

colnames(viet_coord)<-c("province_wartime","long","lat")

head(viet_coord)

viet_coord$province_wartime<-replaceforeignchars(viet_coord$province_wartime,
                                                 fromto)



pnas_surveydata[which(pnas_surveydata$province_wartime=="Ho Chi Minh (City)"),]$province_wartime<-"Ho Chi Minh"

pnas_surveydata[which(pnas_surveydata$province_wartime=="Hai Phong (City)"),]$province_wartime<-"Hai Phong"

pnas_surveydata[which(pnas_surveydata$province_wartime=="Da Nang (City)"),]$province_wartime<-"Da Nang"

pnas_surveydata[which(pnas_surveydata$province_wartime=="Ha Noi (City)"),]$province_wartime<-"Ha Noi"


unique(pnas_surveydata$province_wartime)

viet_coord$province_wartime

unique(pnas_surveydata$province_wartime)%in%viet_coord$province_wartime

viet_coord$province_wartime[viet_coord$province_wartime=="TP. Ho Chi Minh"]<-"Ho Chi Minh"

viet_coord$province_wartime[viet_coord$province_wartime=="Thuathien-Hue"]<-"Thua Thien - Hue"

viet_coord$province_wartime[viet_coord$province_wartime=="Ba Ria - Vung Tau"]<-"Ba Ria"

viet_spatial<-merge(pnas_surveydata,viet_coord,by="province_wartime",all.x=T)

viet_spatial1<-viet_spatial[,c("log_civic_engagement", 
                               "bombs_per_sqm_log_wartime","lat","long","diff17_wartime_corr")]

viet_spatial1<-viet_spatial1[which(complete.cases(viet_spatial1)),]

mycoords<-coordinates(cbind(as.numeric(viet_spatial1$lat), 
                            as.numeric(viet_spatial1$long)))

mydm<-rdist.earth(mycoords)            # computes distance in miles!

for(i in 1:dim(mydm)[1]){mydm[i,i]=0} # renders exactly zero all diagonal elements

mydm[mydm>1000]<-0                   # all distances > 1000 miles are set to zero

mydm<-ifelse(mydm!=0,1/mydm,mydm)    # inverting distances

mydm_lw1<-mat2listw(mydm,style="W")    # create a (normalized) listw object

mydmi<-listw2mat(mydm_lw1)              # change back to 'classic' matrix, if desired

wwviet<-as.matrix(mydmi)

h1<-kronecker(wwviet,diag(1))

viet_spatial2<-viet_spatial[,c("log_civic_engagement", 
                               "bombs_per_sqm_log_wartime","lat","long","female",
                               "age","education",
                               "diff17_wartime_corr",
                               "popdensity6061_wartime","pre_avg_wartime","south_wartime",
                               "latitude_wartime_corr")]

viet_spatial2<-viet_spatial2[which(complete.cases(viet_spatial2)),]
nrow(viet_spatial2)

mycoords<-coordinates(cbind(as.numeric(viet_spatial2$lat),
                            as.numeric(viet_spatial2$long)))

mydm<-rdist.earth(mycoords)            # computes distance in miles!

for(i in 1:dim(mydm)[1]){mydm[i,i]=0} # renders exactly zero all diagonal elements

mydm[mydm>1000]<-0                   # all distances > 1000 miles are set to zero

mydm<-ifelse(mydm!=0,1/mydm,mydm)    # inverting distances

mydm_lw2<-mat2listw(mydm,style="W")    # create a (normalized) listw object

mydmi<-listw2mat(mydm_lw2)              # change back to 'classic' matrix, if desired

wwviet<-as.matrix(mydmi)

h2<-kronecker(wwviet,diag(1))

table3_column1_row13<-spreg(log_civic_engagement~bombs_per_sqm_log_wartime,
                            listw=h1,listw2=NULL,endog=NULL,data=viet_spatial1)

summary(table3_column1_row13)

table3_column2_row13<-spreg(log_civic_engagement~bombs_per_sqm_log_wartime+
                              female+age+education+I(popdensity6061_wartime/1000)+I(pre_avg_wartime/100)+
                              south_wartime + latitude_wartime_corr,listw=h2,listw2=NULL,endog=NULL,instruments=NULL,
                            data=viet_spatial2)

summary(table3_column2_row13)


table3_column3_row13<-spreg(log_civic_engagement~female+age+education,
                            listw=h2,listw2=NULL,endog=~ bombs_per_sqm_log_wartime,
                            instruments=~diff17_wartime_corr,
                            data = viet_spatial2)

summary(table3_column3_row13)

table3_column3.1_row13<-spreg(log_civic_engagement~popdensity6061_wartime,
                              listw=h2,listw2=NULL,endog=~ bombs_per_sqm_log_wartime,
                              instruments=~diff17_wartime_corr,
                              data = viet_spatial2)

summary(table3_column3.1_row13)

table3_column4_row13<-spreg(log_civic_engagement~female+age+education+
                              I(popdensity6061_wartime/1000)+
                              I(pre_avg_wartime/100)+south_wartime+ latitude_wartime_corr
                            ,listw=h2,listw2=NULL,endog=~bombs_per_sqm_log_wartime,
                            instruments=~diff17_wartime_corr,data=viet_spatial2)

summary(table3_column4_row13)


###########
#         #
# TABLE 4 #
#         #
###########

summary(table4column1 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + I(bombs_per_sqm_log_contemp - bombs_per_sqm_log_wartime), data= pnas_surveydata))

N <- nobs(table4column1)
G <- length(unique(pnas_surveydata$province_wartime))-1
dfa <- (G/(G - 1)) * (N - 1)/table4column1$df.residual

cov <- dfa * vcovHC(table4column1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab4col1 <- sqrt(diag(cov))
coeftest(table4column1, vcov=cov)

summary(table4column2 <- lm(log_civic_engagement ~ bombs_per_sqm_log_wartime + I(bombs_per_sqm_log_contemp - bombs_per_sqm_log_wartime) + female + age + education 
                            + popdensity6061_wartime + diff_popdensity6061_2001_1975 + pre_avg_wartime + diff_pre_avg_2001_1975 + south_wartime + diff_south_2001_1975 + latitude_wartime_corr + diff_latitude_2001_1975_corr, data= pnas_surveydata))

N <- nobs(table4column2)
dfa <- (G/(G - 1)) * (N - 1)/table4column2$df.residual

cov <- dfa * vcovHC(table4column2, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab4col2 <- sqrt(diag(cov))
coeftest(table4column2, vcov=cov)

summary(table4column3 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + I(bombs_per_sqm_log_contemp - bombs_per_sqm_log_wartime) |
                                 . - bombs_per_sqm_log_wartime - I(bombs_per_sqm_log_contemp - bombs_per_sqm_log_wartime) 
                               + diff_17_wartime + I(diff_17_contemp - diff_17_wartime), data= pnas_surveydata))

N <- table4column3$nobs
dfa <- (G/(G - 1)) * (N - 1)/table4column3$df.residual
cov <- dfa * vcovHC(table4column3, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab4col3 <- sqrt(diag(cov))
coeftest(table4column3, vcov=cov)

summary(table4column3.1 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + I(bombs_per_sqm_log_contemp - bombs_per_sqm_log_wartime) + popdensity6061_wartime + diff_popdensity6061_2001_1975 |
                                 . - bombs_per_sqm_log_wartime - I(bombs_per_sqm_log_contemp - bombs_per_sqm_log_wartime) + popdensity6061_wartime + diff_popdensity6061_2001_1975
                               + diff17_wartime_corr + I(diff17_contemp_corr - diff17_wartime_corr), data= pnas_surveydata))

N <- table4column3.1$nobs
dfa <- (G/(G - 1)) * (N - 1)/table4column3.1$df.residual
cov <- dfa * vcovHC(table4column3.1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab4col3.1 <- sqrt(diag(cov))
coeftest(table4column3.1, vcov=cov)

summary(table4column4 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + I(bombs_per_sqm_log_contemp - bombs_per_sqm_log_wartime) + female + age + education 
                               + popdensity6061_wartime + diff_popdensity6061_2001_1975 + pre_avg_wartime + diff_pre_avg_2001_1975 + south_wartime + diff_south_2001_1975 + latitude_wartime_corr + diff_latitude_2001_1975_corr |
                                 . - bombs_per_sqm_log_wartime - I(bombs_per_sqm_log_contemp - bombs_per_sqm_log_wartime) 
                               + diff17_wartime_corr + I(diff17_contemp_corr - diff17_wartime_corr), data= pnas_surveydata), diagnostics=TRUE)

N <- table4column4$nobs
dfa <- (G/(G - 1)) * (N - 1)/table4column4$df.residual
cov <- dfa * vcovHC(table4column4, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tab4col3 <- sqrt(diag(cov))
coeftest(table4column4, vcov=cov)

stargazer(table4column1, table4column2, table4column3,
          se = list(cluster_se_tab4col1, cluster_se_tab4col2, cluster_se_tab4col3),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)

stargazer(table4column3.1, table4column4,
          se = list(cluster_se_tab4col3.1, cluster_se_tab4col3),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05), digits=2)


##############
#            #
# Appendix X #
#            #
##############

summary(table1_appX_column1 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education + I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100)
                                     + south_wartime + I(latitude_wartime_corr/10) | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table1_appX_column1$n
dfa <- (G/(G - 1)) * (N - 1)/table1_appX_column1$df.residual

cov <- dfa * vcovHC(table1_appX_column1, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tabappXcol1 <- sqrt(diag(cov))
coeftest(table1_appX_column1, vcov=cov)

summary(table1_appX_column2 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education + I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100)
                                     + south_wartime | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table1_appX_column2$n
dfa <- (G/(G - 1)) * (N - 1)/table1_appX_column2$df.residual

cov <- dfa * vcovHC(table1_appX_column2, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tabappXcol2 <- sqrt(diag(cov))
coeftest(table1_appX_column2, vcov=cov)

summary(table1_appX_column3 <- ivreg(log_civic_engagement ~ bombs_per_sqm_log_wartime + female + age + education + I(popdensity6061_wartime/1000) + I(pre_avg_wartime/100)
                                    + I(latitude_wartime_corr/10) | . - bombs_per_sqm_log_wartime + diff17_wartime_corr, data = pnas_surveydata), diagnostics=TRUE)

N <- table1_appX_column3$n
dfa <- (G/(G - 1)) * (N - 1)/table1_appX_column3$df.residual

cov <- dfa * vcovHC(table1_appX_column3, type = "HC0", cluster = ~province_wartime, adjust = T)
cluster_se_tabappXcol3 <- sqrt(diag(cov))
coeftest(table1_appX_column3, vcov=cov)

stargazer(table1_appX_column1, table1_appX_column2, table1_appX_column3,
          se = list(cluster_se_tabappYcol1, cluster_se_tabappYcol2, cluster_se_tabappYcol3),
          omit.stat = "f",
          star.cutoffs = c(0.1, 0.05, 0.001), digits=2)


